#!/usr/bin/env python
# -*- coding:utf8 -*-

"""
@version: 
@author: lh
@license: Apache Licence 
@contact: liuhuan0672@gmail.com
@site: 
@software: PyCharm
@file: 0006.py
@time: 2016/1/20 14:36

第 0006 题：你有一个目录，放了你一个月的日记，都是 txt，为了避免分词的问题，假设内容都是英文，请统计出你认为每篇日记最重要的词。
"""
import re


def important_word(target_file):
    file_obj = open(target_file, 'r')
    file_content = file_obj.read()

    p = re.compile(r'[\W\d]*')
    word_list = p.split(file_content)

    word_dict = {}
    for word in word_list:
        if word not in word_dict:
            word_dict[word] = 1
        else:
            word_dict[word] += 1
    sort = sorted(word_dict.items(), key=lambda e: e[1], reverse=True)
    print('The most word in "%s" is "%s",it appears %s times.' % (target_file, sort[0][0], sort[0][1]))
    print('The second most word in "%s" is "%s",it appears %s times.' % (target_file, sort[1][0], sort[1][1]))
    file_obj.close()


if __name__ == '__main__':
    important_word('./static/0004.txt')
